
/************************************************************************************
 *______________________________________Volume______________________________________*
 ************************************************************************************/

/*
PURPOSE:

    The goal of this script is to calculate the volume executed on any of the Top 8
    exchanges for symbol-dates in 2015 from the Daily TAQ Trades file.
    This script also carries the option of calculating volume at finer time intervals than the
    trading day (e.g. 5 min, 1 hour, 1 min, etc.).
    A row in the output of this script would be, for example, the share and dollar volume on NYSE
    for SPY on March 8, 2015.

PRIMARY INPUT:

    The Daily TAQ Trades file is a dataset where a row is a single trade. Each row contains
    the date the trade was executed, the timestamp (millisecond precision before 8/3/2015,
    microsecond precision afterwards), the symbol root (name) and symbol suffix (equity class),
    the exchange where the trade was executed, the trade price and quantity,
    and a trade sale condition that describes the context of the trade (e.g. a stock-option trade,
    a next day trade, an intermarket sweep).

APPROACH:

    We calculate volume from the subset of "regular" trades in the Daily TAQ Trades file.
    Regular trades are:

    1. The trade had a valid record and was not been cancelled or corrected.
    2. The trade occurred on exchange and not on FINRA's Alternative Display Facility
    3. The trade occurred during regular trading hours (adjusted for halfdays)
    4. The trade took place under normal conditions. We identify such trades labelled by the
    trade sale condition as "Regular", "Odd Lot", "Intermarket Sweep", and any valid combination
    of the three. We exclude all trades that include any other trade conditions.
    For more context on the trade sale condition code, please consult "PLACEHOLDER README"

    The procedure can be described as follows.

    1. First, we load the Daily TAQ Trades file for a single day and identify regular trades
    with the flag f_Vol_Regular.

    2. Second, we group by date (interval), symbol, and exchange and add up all the volume contained
    in the regular trades that occurred within this date-symbol-exchange (date-interval-symbol-exchange)
    combination. An example would be the regular volume transacted on NYSE for SPY on 5/8/2015
    (supposing the time interval is the trading day). We output a row for each
    date-symbol-exchange (date-interval-symbol-exchange) combination.

    3. We repeat the same procedure for every day in every month in 2015.

SUPPLEMENTAL INPUT FILES:

    "Server_Directories.xlsx"
    "Holidays.xlsx"

PREREQ:

    Find the statement that declares "Interval_Length". Set this to the time interval
    length of your choice using the format ("hh:mm:ss.xxx"t).
    The default is "06:30:00.000", which is the length of a full trading day.
    For halfdays, the interval length is set to "4:00:00.000", which is the length
    of the trading day during halfdays.

INSTRUCTION:

    Find the wrapper "Volume.sh". Make sure that in this file, the argument after
    "-sysparm" is "2015". Submit the wrapper on the WRDS server.
*/


/*****************************************************************************************
 *_______________________________________INITIALIZE______________________________________*
 *****************************************************************************************/

/*
 * WRDS LIBRARY:
 *
 * This sets up the wharton library, which allows us to access the CRSP and TAQ datasets.
 */
%include '/wrds/lib/utility/wrdslib.sas';
options mprint;

/*
 * IMPORT INPUT PARAMETERS:
 *
 * Make sure to enter the path to your input directory below.
 */
proc import datafile="/home/uchicago/eche/sasuser.v94/WRDS/Input/Server_Directories" out= server_directories dbms = xlsx replace;
run;
proc import datafile="/home/uchicago/eche/sasuser.v94/WRDS/Input/Holidays" out= holidays dbms = xlsx replace;
run;

data holiday_import;
    set holidays;

    if holiday = "New_Years" then do; call symput('New_Years', trim(date)); end;
    if holiday = "MLK_Day" then do; call symput('MLK_Day', trim(date)); end;
    if holiday = "Presidents_Day" then do; call symput('Presidents_Day', trim(date)); end;
    if holiday = "Good_Friday" then do; call symput('Good_Friday', trim(date)); end;
    if holiday = "Memorial_Day" then do; call symput('Memorial_Day', trim(date)); end;
    if holiday = "Independence_Day" then do; call symput('Independence_Day', trim(date)); end;
    if holiday = "Labor_Day" then do; call symput('Labor_Day', trim(date)); end;
    if holiday = "Thanksgiving" then do; call symput('Thanksgiving', trim(date)); end;
    if holiday = "Christmas" then do; call symput('Christmas', trim(date)); end;
    if holiday = "Hurricane_Sandy" then do; call symput('Hurricane_Sandy', trim(date)); end;
    if holiday = "Halfdays" then do; call symput('Halfdays', trim(date)); end; /* e.g. Christmas Eve, July 3rd */
run;

data directory_import;
    set server_directories;

    if directory = "user_directory" then do; call symput('userdir', trim(path)); end;
    if directory = "volume_directory" then do; call symput('voldir', trim(path)); end;
    if directory = "temp_directory" then do; call symput('tempdir', trim(path)); end;
run;

/**
 * Directories:
 *
 * Sets up the directories according to the paths in the "Server_Directories.xlsx" file.
 */
libname userdir &userdir;
libname voldir &voldir;
libname tempdir &tempdir;

/* Set user_path for import/export
 * We need to dequote the path passed in from "Server_Directories.xlsx"
 */
%let user_path  = %qsysfunc(dequote(&userdir));


/**********************************************************************************************
 *______________________________________GLOBAL VARIABLES______________________________________*
 **********************************************************************************************/

/*
 * Specify the year for which to calculate volume.
 * Uncomment this line to run from the SAS Studio website.
 */
/* %let sysparm = "2015"; */

/* Obtain the year from the wrapper. */
%let YEAR_INPUT = &SYSPARM.;

/* Set interval length. */
%let interval_length_input = "06:30:00.000"t;

/* Trade beginning and end times. */
%let trade_begin_time = "09:30:00.000"t;
%let trade_end_time_reg = "16:00:00.000"t;
%let trade_end_time_halfday = "13:00:00.000"t;

/* Length of the trading day. */
%let regular_trading_day_length = "06:30:00.000"t;
%let halfday_trading_length = "03:30:00.000"t;

/* Set constant to convert milliseconds into microseconds. */
%let micro_to_milliseconds = 1000;

/***********************************************************************************
 *______________________________________MACRO______________________________________*
 ***********************************************************************************/

/*
 * _MACRO_ : DailyVolume(YYYYMMDD)
 *
 * PARAM: YYYYMMDD: The day for which this macro calculates volume
 *
 * This Macro first loads the Daily TAQ Trades file and identifies the trades
 * which are considered when calculating volume.
 * These trades are marked with "f_Vol_Regular" and the description for this flag
 * can be found below.
 *
 * Then we iterate through each interval (specified in "Global Variables") and
 * calculate the volume traded within each symbol-exchange pair for every interval.
 * We end up with a symbol-date(interval)-exchange level volume dataset.
 *
 * OUTPUT: Day_Vol_&YYYYMMDD, symbol-date(interval)-exchange level volume dataset
 */
%Macro DailyVolume(YYYYMMDD);
    /**
     * INPUT: taqmsec.ctm_&YYYYMMDD, Daily TAQ Trades File
     *
     * Loads the Daily TAQ Trades file and identifies valid trades with the regular volume flag.
     * Later, we aggregate volume from these trades.
     * We also assign each trade to a time interval, specified by "Interval_Length".
     *
     * OUTPUT: _trades, Daily TAQ Trades File with volume flags
     */
    data _trades;
        set taqmsec.ctm_&YYYYMMDD.;

        /* Relabel trades with exchange code 'Q' to exchange code 'T' (combine Nasdaq codes). */
        if ex = "Q" then ex = "T";

        /*
         * ______________________________________REGULAR VOLUME______________________________________
         * Here we identify all the trades that we consider as regular volume.
         *
         * 1. f_Valid_Record : f_Valid_Record indicates that the entry in the trades dataset
         * contains the correct record of that trade.
         *
         * 2. f_On_Exchange : f_On_Exchange indicates that the trade was executed on an exchange
         * and not on FINRA's Alternative Display Facility.
         *
         * 3. f_Regular_Hours : f_Regular_Hours indicates that the trade was executed during
         * regular trading hours, which are adjusted during halfdays. This also excludes trades
         * conducted on holidays.
         *
         * 4. f_Regular_Trade : f_Regular_Trade indicates that the trade took place under regular
         * conditions. These are trades labelled as "Regular Trades", "Intermarket Sweeps",
         * and "Odd Lot", and any valid combination of the three.
         */

        /* f_Valid_Record
         *
         * Trade Correction Codes:
         *
         * "00" : Regular trade which was not corrected, changed or signified as cancel or error.
         * "01" : Original trade which was late corrected (this record contains the original time
         * and the corrected data for the trade).
         */

        f_Valid_Record = tr_corr in ("00", "01");

        /*
         * f_On_Exchange
         *
         * Exchange Code: "D": FINRA Alternative Display Facility.
         * (Note that this constraint might not bind if D is not included in the exchange list).
         */
        f_On_Exchange = ex ne "D";

        /* f_Regular_Hours
         *
         * Regular trading hours are between 9:30 am and 4:00 pm. Execeptions are holidays and halfdays.
         * Incidents (e.g. Flash Crash, Hurricane Sandy) are dropped in the MonthlyVolume Macro.
         *
         * Holidays:
         * New Years, MLK, Presidents Day, Good Friday, Memorial Day, Independence Day, Labor Day,
         * Thanksgiving, Christmas.
         *
         * Halfdays:
         * Usually around December 24, November 27, and July 3, right before major holidays.
         * On a half day, trading occurs from 9:30 am to 1:00 pm.
         */
        f_New_Years_Day = date in &New_Years;
        f_MLK_Day=date in &MLK_Day;
        f_Presidents_Day=date in &Presidents_Day;
        f_Good_Friday=date in &Good_Friday;
        f_Memorial_Day=date in &Memorial_Day;
        f_Indpdnce_Day=date in &Independence_Day;
        f_Labor_Day=date in &Labor_Day;
        f_Thanksgiving=date in &Thanksgiving;
        f_Christmas_Day=date in &Christmas;
        f_Holiday=f_New_Years_Day or f_MLK_Day or f_Presidents_Day or f_Good_Friday
            or f_Memorial_Day or f_Indpdnce_Day or f_Labor_Day or f_Thanksgiving or
            f_Christmas_Day;

        /* Halfdays */
        f_Halfday=date in &Halfdays;

        f_Regular_Hours=((~f_Holiday      &
                            ~f_Halfday      &
                            time_m > &trade_begin_time  &
                            time_m < &trade_end_time_reg) or
                          (~f_Holiday      &
                            f_Halfday      &
                            time_m > &trade_begin_time &
                            time_m < &trade_end_time_halfday));

        /* f_Regular_Trade
         *
         * Trade Condition Codes:
         *
         * '@' = Regular Trade
         * 'I' = Odd lot
         * 'FI' = Intermarket Sweep and Odd Lot (CTA Only)
         * 'F' = Intermarket Sweep (CTA Only)
         * '@F' = Regular Trade and an Intermarket Sweep (UTP Only)
         * '@FI' = Regular Trade, Intermarket Sweep and Odd Lot (UTP Only)
         * '@I' = Regular Trade and Odd Lot (UTP Only)
         * '' = Regular Trade (UTP Only)
         *
         * The UTP only and CTA only notes are observations in 2015 but not requirements for the code.
         */

        /*
         * We remove the white space in the trade condition
         * to identify the code combinations for our regular trades.
         */
        tr_scond_no_space = compress(tr_scond);

        f_Regular_Trade = tr_scond_no_space = "@" or
                               tr_scond_no_space = "I" or
                               tr_scond_no_space = "F" or
                               tr_scond_no_space = "FI" or
                               tr_scond_no_space = "@F" or
                               tr_scond_no_space = "@FI" or
                               tr_scond_no_space = "@I" or
                               tr_scond_no_space = "E" or
                               tr_scond_no_space = "FE" or
                               tr_scond = "";

        /* Combine flags to create the regular volume flag. */
        f_Vol_Regular = f_Valid_Record &
                        f_On_Exchange &
                        f_Regular_Hours &
                        f_Regular_Trade;

        /* Convert the time to milliseconds. */
        time_m = floor(time_m * &micro_to_milliseconds) / &micro_to_milliseconds;

        /*
         * If it is a half day and if the interval is equal to a full trading day,
         * we adjust the interval length to be the length of a half day.
         */
        if f_Halfday then do;
            if &interval_length_input = &regular_trading_day_length then do;
                interval_length = &halfday_trading_length;
            end;
        end;
        else do;
            interval_length = &interval_length_input;
        end;

        /*
         * Calculate which interval a trade falls in from the millisecond timestamp.
         * For a 6.5 hr interval (length of the trading day), all trades are in interval 0.
         */
        interval =  floor((time_m - &trade_begin_time)/interval_length);

        format time_m time12.3;
    run;

    /*
     * Restrict the trades dataset to Regular Hours trades.
     * We also remove the 'Q' exchange since all of its trades are now under exchange code 'T'.
     * We also remove the 'D' exchange because that corresponds with the volume
     * traded on FINRA's ADF. We do not count this volume.
     *
     * Sort the trades by date, interval, symbol, and exchange.
     */
    data _regular_hours_trades;
        set _trades;
        where (f_Regular_Hours = 1) and ex not in ('Q', 'D');

        proc sort;
            by date interval sym_root sym_suffix ex;
    run;

    /**
     * INPUT: _regular_hours_trades, sorted Daily TAQ Trades file with volume flags
     *
     * Aggregates the volume traded on each exchange for every symbol-interval.
     * Note that this will create a record for each exchange only if an exchange has
     * non-zero trading of a symbol for a given date and interval.
     *
     * OUTPUT: _Sym_Ex_Interval_Vol, symbol-date(interval)-exchange volume data
     */
    data _Sym_Ex_Interval_Vol;
        set _regular_hours_trades;

        by date interval sym_root sym_suffix ex;

        /*
         * We set up an array to store the volume measures for each exchange.
         * The first entry stores the share volume, the second entry stores the dollar volume.
         * We save these numbers in an array for code simplification.
         */
        array Volume(2);

        /* retain saves the volume variables across iterations */
        retain Volume1-Volume2 .;

        /**
         * ______________________________________FIRST TRADE OF THE EXCHANGE______________________________________
         */

        /*
         * For the first trade of the symbol-interval-exchange combination,
         * we reset the volume variables to 0.
         */
        if first.ex then do;
            Volume(1) = 0;
            Volume(2) = 0;
        end;

        /**
         * ______________________________________FOR EVERY TRADE ON THE EXCHANGE______________________________________
         */

        /* Sum up the share and dollar volume from regular trades. */
        if f_Vol_Regular then do;
            Volume(1) = sum(Volume(1), size);
            Volume(2) = sum(Volume(2), size * price);
        end;

        /**
         * ______________________________________LAST TRADE OF THE EXCHANGE______________________________________
         */

        /*
         * We only save the row of the last trade of trades data
         * to get a single row for each symbol-date(interval)-exchange combination.
         */
        if last.ex;

        Volume_S = Volume(1);
        Volume_D = Volume(2);
        output;

        keep date interval sym_root sym_suffix ex Volume_S Volume_D;
    run;


    /*** Save the daily files to the volume directory. ***/
    data voldir.Day_Vol_&YYYYMMDD.;
        set _Sym_Ex_Interval_Vol;
    run;

    /* Clear work library. */
    proc datasets library = work;
        delete _trades;
        delete _regular_hours_trades;
        delete _Sym_Ex_Interval_Vol;
    run;

%Mend;

/*
 * _MACRO_ : MonthlyVolume(YYYYMM)
 *
 * PARAM: YYYYMM: The month for which this macro aggregates volume.
 *
 * REQUIRES: DailyVolume
 *
 * This Macro takes the TAQ Volume for a given month, specified by YYYYMM,
 * and iterates through all the days in that month, specified from the "TAQMSEC" library.
 * It iterates through all the days in the month,
 * and for each day applies the DailyVolume Macro.
 *
 * OUTPUT: Month_Vol_&YYYYMM, symbol-date(interval)-exchange level volume dataset
 */
%Macro MonthlyVolume(YYYYMM);

    /*
     * INPUT: sashelp.vtable, this is a table containing a row for all available datasets.
     * There's a column for "libname", the library the dataset belongs to, and "memname",
     * which is the name of the dataset.
     *
     * Extracts the Daily TAQ Trades filenames for each day in the month.
     *
     * OUTPUT: Daily_Trades_Filenames, list of filenames
     */
    proc sql;
        create table Daily_Trades_Filenames
        as select
        libname,
        memname
        from sashelp.vtable where
        libname = "TAQMSEC" and memname like "CTM_&YYYYMM.%";
    quit;

    /*
     * Counts the number of filenames in Daily_Trades_Filenames as "nobs".
     * Save a list of the filenames as "file_list", separated by "|".
     * These are used to iterate over the valid trading days within the month.
     */
    proc sql print;
        select count(*) into: nobs from Daily_Trades_Filenames;
        select memname into: file_list separated by '|' from Daily_Trades_Filenames;
    quit;

    /* Loop over days and run DailyVolume for each valid trading day. */
    %do i=1 %to &nobs.;

        /* Get the name of the days and the daily file names in the month. */
        %let ctm_file  = %scan(%quote(&file_list.), &i., '|');

        /* The date is extracted from the filename by taking the content after "CTM_". */
        %let YYYYMMDD  = %substr(&ctm_file., 5, 8);

        /* Apply DailyVolume Macro. */
        %DailyVolume(YYYYMMDD=&YYYYMMDD.);
    %end;

    /*** Save the monthly files to the volume directory. ***/
    data voldir.Month_Vol_&YYYYMM.;
        set voldir.Day_Vol_&YYYYMM.:;
    run;

    /* Delete the daily files in the volume directory. */
    proc datasets library = voldir;
        delete Day_Vol_&YYYYMM.:;
    run;
%Mend;

/*
 * _MACRO_ : AnnualVolume(YYYY)
 *
 * PARAM: YYYY: The year for which this macro aggregates volume.
 *
 * REQUIRES: MonthlyVolume
 *
 * This Macro takes the TAQ Volume for a given year, specified by YYYY,
 * and iterates through all the months in that year.
 * For every month it applies the MonthlyVolume Macro.
 *
 * Since Reg NMS was implemented in October 2007, for that year we will start from October if 2007 is requested.
 *
 * OUTPUT: Volume_&YYYY, symbol-date(interval)-exchange level volume dataset
 */
%Macro AnnualVolume(YYYY);
    %let month_list = 01|02|03|04|05|06|07|08|09|10|11|12;

    /*
     * Iterate across all months in the year for years after 2007.
     * Run MonthlyVolume for every month in the year.
     */
    %if (&YYYY. > 2007) %then %do;
        %do m=1 %to 12;
            %let month = %scan(%quote(&month_list.), &m., '|');
            %put ##### Processing &month. &YYYY.;
            %MonthlyVolume(YYYYMM=&YYYY.&month.);
        %end;
    %end;

    /* For 2007, start data collection in october when REG NMS starts. */
    %if (&YYYY.=2007) %then %do;
        %do m=10 %to 12;
            %let month = %scan(%quote(&month_list.), &m., '|');
            %put ##### Processing &month. &YYYY.;
            %MonthlyVolume(YYYYMM=&YYYY.&month.);
        %end;
    %end;

    /*** Save the annual files to the volume directory. ***/
    data voldir.Volume_&YYYY.;
        set voldir.Month_Vol_&YYYY.:;
    run;

    /* Save in temporary directory too */
    data tempdir.Volume_&YYYY.;
        set voldir.Volume_&YYYY.;
    run;

    /* Delete the monthly files in the volume directory. */
    proc datasets library = voldir;
        delete Month_Vol_&YYYY.:;
        run;
    quit;

    proc export data = voldir.Volume_&YYYY.
        outfile = "&user_path./volume_&YYYY..csv"
        dbms = csv
    replace;

%Mend;

/**********************************************************************************
 * ______________________________________RUN______________________________________*
 **********************************************************************************/

%AnnualVolume(&YEAR_INPUT.);
